This document explores the mango data from SIAP - Mexico. - Data has been summarized at the state level. - 26 states report mango production during the time period of 1980 - 2016. - There are 962 observations total (26 states x 37 years), one observation/year for each state.
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 1.0.2
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.2
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
SIAP <- read.csv("/Users/erikaluna/R\ Studio/msc_thesis/SIAP.csv")
one_crop <- SIAP %>%
filter(crop == "mango") %>%
group_by(year, state) %>%
summarise(ag_yield = round(sum(production)/sum(harvested), digits = 2),
ag_prod = sum(production),
ag_planted = sum(planted),
ag_harv = sum(harvested),
ag_losses = sum(losses))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
A data frama with all years and all states that grow mango.
period <- tibble(rep(c(1980:2016), times = 26)) #26 states report mango production
colnames(period) <- c("year")
states <- tibble(rep(c("baja california", "baja california sur", "campeche",
"chiapas", "colima", "durango",
"guanajuato", "guerrero", "hidalgo", "jalisco",
"mexico", "michoacan", "morelos", "nayarit", "oaxaca",
"puebla", "queretaro", "quintana roo",
"san luis potosi", "sinaloa", "sonora", "tabasco",
"tamaulipas", "veracruz", "yucatan", "zacatecas"), times = 37))
colnames(states) <- c("state")
states <- states %>%
arrange(state)
states_period <- cbind(states, period)
mango <- left_join(states_period, one_crop, by=c("state", "year"))
mango <- mango %>%
transform(i=as.numeric(factor(state))) %>%
transform(t=as.numeric(factor(year))) %>%
group_by(year) %>%
arrange(state)
mango %>%
DT::datatable()
mango %>%
group_by(state) %>%
summarise(max_prod = max(ag_prod, na.rm=T),
min_prod = min(ag_prod, na.rm=T),
range_prod = max(ag_prod, na.rm=T) - min(ag_prod, na.rm=T),
sd_prod = sd(ag_prod, na.rm=T),
mean_prod = mean(ag_prod, na.rm=T),
median_prod = median(ag_prod, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| state | max_prod | min_prod | range_prod | sd_prod | mean_prod | median_prod |
|---|---|---|---|---|---|---|
| baja california | 0.00 | 0.00 | 0.00 | NA | 0.0000 | 0.000 |
| baja california sur | 9913.65 | 578.00 | 9335.65 | 2230.2162 | 3563.9111 | 2789.800 |
| campeche | 42933.70 | 3930.00 | 39003.70 | 11796.7935 | 23410.3406 | 26213.720 |
| chiapas | 238429.55 | 37760.00 | 200669.55 | 63980.3042 | 123791.2263 | 131164.700 |
| colima | 91294.00 | 9623.00 | 81671.00 | 19869.9166 | 48479.2611 | 49431.560 |
| durango | 1576.42 | 65.00 | 1511.42 | 492.2448 | 818.9611 | 720.000 |
| guanajuato | 405.00 | 0.00 | 405.00 | 143.2752 | 205.7000 | 211.000 |
| guerrero | 372282.78 | 36575.00 | 335707.78 | 93659.3813 | 215997.6649 | 189171.000 |
| hidalgo | 2568.00 | 0.00 | 2568.00 | 534.0370 | 701.3468 | 588.965 |
| jalisco | 113607.55 | 27022.00 | 86585.55 | 14033.8712 | 51651.2451 | 48642.000 |
| mexico | 9620.00 | 1545.00 | 8075.00 | 1583.3664 | 4304.7063 | 4499.750 |
| michoacan | 144675.07 | 19434.00 | 125241.07 | 36260.2297 | 95737.3186 | 109750.000 |
| morelos | 10726.00 | 5079.99 | 5646.01 | 1327.1275 | 7905.7977 | 8055.000 |
| nayarit | 364814.00 | 52362.00 | 312452.00 | 81069.9913 | 186393.2317 | 181319.350 |
| oaxaca | 275120.00 | 73573.00 | 201547.00 | 33085.2491 | 174068.0546 | 177690.000 |
| puebla | 2507.00 | 122.70 | 2384.30 | 603.4554 | 1059.7005 | 920.000 |
| queretaro | 1414.00 | 102.00 | 1312.00 | 297.5532 | 522.3822 | 460.000 |
| quintana roo | 230.00 | 20.00 | 210.00 | 63.8367 | 114.8667 | 102.000 |
| san luis potosi | 19429.00 | 775.00 | 18654.00 | 3118.7475 | 4240.9571 | 4008.000 |
| sinaloa | 339530.40 | 11897.00 | 327633.40 | 79157.4889 | 158816.6494 | 158796.000 |
| sonora | 3916.00 | 0.00 | 3916.00 | 676.7596 | 539.5373 | 349.500 |
| tabasco | 9348.00 | 980.30 | 8367.70 | 2557.4636 | 3826.4303 | 3152.500 |
| tamaulipas | 12984.98 | 0.00 | 12984.98 | 3391.8427 | 6345.0753 | 6737.000 |
| veracruz | 311128.00 | 87844.80 | 223283.20 | 65246.1948 | 189157.0960 | 182775.000 |
| yucatan | 16149.00 | 1113.40 | 15035.60 | 2807.8291 | 5158.2930 | 5086.000 |
| zacatecas | 1419.00 | 21.00 | 1398.00 | 298.1457 | 334.0278 | 169.500 |
mango %>%
ggplot(aes(state, ag_prod)) +
geom_boxplot() +
ylab("Production (tonnes)") +
xlab("State") +
#scale_y_continuous(labels = comma) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).
number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_prod)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
## state obs
## <chr> <int>
## 1 baja california sur 35
## 2 campeche 35
## 3 chiapas 35
## 4 colima 35
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 35
## 9 michoacan 35
## 10 morelos 35
## 11 nayarit 35
## 12 oaxaca 35
## 13 puebla 37
## 14 queretaro 37
## 15 san luis potosi 35
## 16 sinaloa 35
## 17 sonora 37
## 18 veracruz 35
## 19 yucatan 37
## 20 zacatecas 36
mango_ts <- mango %>%
ggplot(aes(year, ag_prod)) +
geom_line()+
ylab("Production (tonnes)") +
xlab("Years") +
ggtitle("Mango Production 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 5 rows containing missing values (geom_path).
mango %>%
group_by(state) %>%
summarise(max_yield = max(ag_yield, na.rm=T),
min_yield = min(ag_yield, na.rm=T),
range_yield = max(ag_yield, na.rm=T) - min(ag_yield, na.rm=T),
sd_yield = sd(ag_yield, na.rm=T),
mean_yield = mean(ag_yield, na.rm=T),
median_yield = median(ag_yield, na.rm=T)) %>%
knitr::kable()
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## Warning in max(ag_yield, na.rm = T): no non-missing arguments to max;
## returning -Inf
## Warning in min(ag_yield, na.rm = T): no non-missing arguments to min;
## returning Inf
## `summarise()` ungrouping output (override with `.groups` argument)
| state | max_yield | min_yield | range_yield | sd_yield | mean_yield | median_yield |
|---|---|---|---|---|---|---|
| baja california | -Inf | Inf | -Inf | NA | NaN | NA |
| baja california sur | 13.11 | 2.98 | 10.13 | 2.4665390 | 7.940286 | 7.980 |
| campeche | 16.23 | 3.54 | 12.69 | 3.1033450 | 11.527143 | 12.060 |
| chiapas | 16.45 | 6.36 | 10.09 | 2.7139152 | 9.633714 | 8.430 |
| colima | 18.93 | 6.50 | 12.43 | 3.4242340 | 12.265429 | 12.870 |
| durango | 6.50 | 2.14 | 4.36 | 1.1974801 | 4.440571 | 4.320 |
| guanajuato | 8.58 | 4.04 | 4.54 | 1.3982515 | 7.222500 | 7.480 |
| guerrero | 21.80 | 10.14 | 11.66 | 2.4896751 | 12.723429 | 11.640 |
| hidalgo | 13.52 | 4.00 | 9.52 | 1.9300048 | 7.066061 | 7.170 |
| jalisco | 14.59 | 7.03 | 7.56 | 1.5088998 | 9.834571 | 9.500 |
| mexico | 15.93 | 5.49 | 10.44 | 1.9393346 | 8.536000 | 8.390 |
| michoacan | 9.47 | 5.09 | 4.38 | 0.9597775 | 6.655143 | 6.380 |
| morelos | 20.04 | 9.57 | 10.47 | 2.4818776 | 13.991143 | 14.230 |
| nayarit | 53.97 | 6.70 | 47.27 | 7.5669918 | 12.153714 | 10.990 |
| oaxaca | 37.48 | 7.34 | 30.14 | 5.5883251 | 12.836000 | 11.400 |
| puebla | 12.23 | 5.33 | 6.90 | 1.5156813 | 8.591351 | 8.230 |
| queretaro | 19.64 | 2.45 | 17.19 | 2.6846601 | 7.150540 | 6.700 |
| quintana roo | 10.00 | 2.23 | 7.77 | 2.8161105 | 5.307333 | 4.590 |
| san luis potosi | 12.72 | 3.47 | 9.25 | 1.9822870 | 8.588286 | 8.250 |
| sinaloa | 14.99 | 4.02 | 10.97 | 2.7130059 | 10.136000 | 10.120 |
| sonora | 22.00 | 1.00 | 21.00 | 5.6936563 | 13.683143 | 14.390 |
| tabasco | 20.68 | 4.90 | 15.78 | 3.1584875 | 7.416250 | 6.055 |
| tamaulipas | 15.24 | 2.21 | 13.03 | 2.7185624 | 6.505758 | 5.700 |
| veracruz | 10.13 | 3.54 | 6.59 | 1.5197955 | 7.102571 | 7.360 |
| yucatan | 24.39 | 9.20 | 15.19 | 2.7960165 | 13.474054 | 13.300 |
| zacatecas | 15.42 | 2.53 | 12.89 | 3.5636149 | 7.518333 | 6.625 |
mango %>%
ggplot(aes(state, ag_yield)) +
geom_boxplot() +
ylab("Yield (tonnes/ha)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 134 rows containing non-finite values (stat_boxplot).
number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_yield)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 20 x 2
## state obs
## <chr> <int>
## 1 baja california sur 35
## 2 campeche 35
## 3 chiapas 35
## 4 colima 35
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 35
## 9 michoacan 35
## 10 morelos 35
## 11 nayarit 35
## 12 oaxaca 35
## 13 puebla 37
## 14 queretaro 37
## 15 san luis potosi 35
## 16 sinaloa 35
## 17 sonora 35
## 18 veracruz 35
## 19 yucatan 37
## 20 zacatecas 36
mango_ts <- mango %>%
ggplot(aes(year, ag_yield)) +
geom_line()+
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("Mango Yields 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 37 rows containing missing values (geom_path).
mango %>%
group_by(state) %>%
summarise(max_area = max(ag_harv, na.rm=T),
min_area = min(ag_harv, na.rm=T),
range_area = max(ag_harv, na.rm=T) - min(ag_harv, na.rm=T),
sd_area = sd(ag_harv, na.rm=T),
mean_area = mean(ag_harv, na.rm=T),
median_area = median(ag_harv, na.rm=T)) %>%
knitr::kable()
## `summarise()` ungrouping output (override with `.groups` argument)
| state | max_area | min_area | range_area | sd_area | mean_area | median_area |
|---|---|---|---|---|---|---|
| baja california | 0.00 | 0.00 | 0.00 | NA | 0.00000 | 0.000 |
| baja california sur | 1058.50 | 194.00 | 864.50 | 280.88033 | 482.87432 | 343.000 |
| campeche | 2746.00 | 1008.00 | 1738.00 | 656.95773 | 1917.71714 | 2172.500 |
| chiapas | 32840.57 | 4720.00 | 28120.57 | 8545.53501 | 14094.29543 | 15534.000 |
| colima | 5357.00 | 1476.00 | 3881.00 | 931.14604 | 3809.70556 | 3837.625 |
| durango | 418.00 | 11.00 | 407.00 | 134.83013 | 197.88571 | 120.000 |
| guanajuato | 50.00 | 0.00 | 50.00 | 18.14632 | 28.20000 | 33.000 |
| guerrero | 24738.40 | 3429.00 | 21309.40 | 6195.62129 | 16828.66486 | 16993.000 |
| hidalgo | 285.00 | 0.00 | 285.00 | 50.84878 | 96.08824 | 102.000 |
| jalisco | 7786.84 | 3444.00 | 4342.84 | 867.18085 | 5225.99086 | 5135.000 |
| mexico | 668.00 | 222.00 | 446.00 | 118.41836 | 490.45270 | 523.500 |
| michoacan | 22520.24 | 2053.00 | 20467.24 | 6375.98865 | 14987.50086 | 18080.000 |
| morelos | 955.00 | 350.93 | 604.07 | 174.28874 | 577.96297 | 565.000 |
| nayarit | 25032.41 | 6307.00 | 18725.41 | 5785.05831 | 16079.22571 | 17795.000 |
| oaxaca | 18193.00 | 5663.00 | 12530.00 | 3382.99136 | 14564.89371 | 14800.000 |
| puebla | 227.00 | 23.00 | 204.00 | 52.74027 | 119.24324 | 122.000 |
| queretaro | 154.00 | 12.00 | 142.00 | 30.09507 | 73.18919 | 68.000 |
| quintana roo | 52.00 | 5.00 | 47.00 | 15.03266 | 24.53333 | 22.000 |
| san luis potosi | 1528.00 | 125.00 | 1403.00 | 260.78664 | 467.67568 | 412.000 |
| sinaloa | 31603.79 | 1158.00 | 30445.79 | 8395.77506 | 16643.79343 | 15073.000 |
| sonora | 289.00 | 0.00 | 289.00 | 48.82309 | 39.97297 | 29.000 |
| tabasco | 1450.00 | 163.00 | 1287.00 | 306.82520 | 512.90625 | 432.500 |
| tamaulipas | 2024.00 | 0.00 | 2024.00 | 390.58291 | 963.97278 | 1076.000 |
| veracruz | 38765.00 | 17771.21 | 20993.79 | 6352.42564 | 26433.37200 | 26769.500 |
| yucatan | 1052.00 | 96.00 | 956.00 | 217.58591 | 392.02054 | 376.200 |
| zacatecas | 92.00 | 4.00 | 88.00 | 16.94997 | 39.11111 | 38.000 |
mango %>%
ggplot(aes(state, ag_harv)) +
geom_boxplot() +
ylab("Area (tonnes)") +
xlab("State") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust= 0.5))
## Warning: Removed 116 rows containing non-finite values (stat_boxplot).
number_obs <- mango %>%
group_by(state) %>%
summarise(obs = sum(!is.na(ag_harv)))
## `summarise()` ungrouping output (override with `.groups` argument)
mango_complete <- number_obs %>%
filter(obs > 34)
mango_complete
## # A tibble: 21 x 2
## state obs
## <chr> <int>
## 1 baja california sur 37
## 2 campeche 35
## 3 chiapas 35
## 4 colima 36
## 5 durango 35
## 6 guerrero 35
## 7 jalisco 35
## 8 mexico 37
## 9 michoacan 35
## 10 morelos 37
## # … with 11 more rows
mango_ts <- mango %>%
ggplot(aes(year, ag_harv)) +
geom_line()+
ylab("Area harvested (ha)") +
xlab("Years") +
ggtitle("Mango - Area Harvested (ha) 1980 - 2016") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
#facet_wrap(~state, ncol=5)
mango_ts
## Warning: Removed 5 rows containing missing values (geom_path).
Regression line
mango_lm <- mango %>%
ggplot(aes(year, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ylab("Yield (tonnes/ha)") +
xlab("Years") +
ggtitle("mango Yields") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
geom_rect(data = subset(mango, state %in% c(mango_complete$state)),
fill = NA, colour = "red", xmin = -Inf,xmax = Inf,
ymin = -Inf,ymax = Inf) +
facet_wrap(~state, scales="free_y", ncol=5)
mango_lm
## Warning: Removed 134 rows containing non-finite values (stat_smooth).
## Warning: Removed 134 rows containing missing values (geom_point).
tmax_summary <- read.csv("/Users/erikaluna/R\ Studio/climate_data/tmax_summary.csv")
tmin_summary <- read.csv("/Users/erikaluna/R\ Studio/climate_data/tmin_summary.csv")
#yields_tmax <- dplyr::inner_join(mango, tmax_summary, by = "state")
yields_tmax <- left_join(mango, tmax_summary, by = c("state" = "state", "year" = "year"))
yield_tmin <- left_join(mango, tmin_summary, by = c("state" = "state", "year" = "year"))
mango_yields_temp <- yields_tmax %>%
filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmax, ag_yield)) +
geom_point() +
facet_wrap(~state, scales="free_y")
mango_yields_temp
## Warning: Removed 10 rows containing missing values (geom_point).
mango_yields_tmin <- yield_tmin %>%
filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmin, ag_yield)) +
geom_point() +
facet_wrap(~state, scales="free_y")
mango_yields_tmin
## Warning: Removed 14 rows containing missing values (geom_point).
mango_lm <- yields_tmax %>%
filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmax, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ggtitle("mango Yields") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
facet_wrap(~state, scales="free_y")
mango_lm
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).
mango_lm <- yield_tmin %>%
filter(state %in% c("baja california sur", "durango", "nayarit", "sinaloa", "sonora")) %>%
ggplot(aes(mean_tmin, ag_yield)) +
geom_point() +
geom_smooth(method = "lm", se = T) +
ggtitle("mango Yields") +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1)) +
facet_wrap(~state, scales="free_y")
mango_lm
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 14 rows containing missing values (geom_point).